Load functions

t1 <- Sys.time()
#source("../../hemp_busco_chrom/Rfunctions.R")
#source("~/gits/ViningLab_code/busco_lineplot/Rfunctions.R")
source("Rfunctions.R")

Set min_BUSCO

my_min_BUSCO <- 2
paste("min_BUSCO:", my_min_BUSCO)

[1] “min_BUSCO: 2”

Load Known or Seed

#busc <- read_busco("DM_6_busco/full_table.tsv")
busc <- read_busco("DM6_full_table.tsv")
busc$Sequence <- paste("DM6:", busc$Sequence, sep = "")
busc[1:3, ]
##    Busco_id   Status  Sequence Gene_Start Gene_End Strand  Score Length
## 1  2at71240 Complete DM6:chr04   59375834 59427686      - 5614.7   4099
## 2 10at71240 Complete DM6:chr02   42824870 42847588      - 6723.1   3829
## 3 17at71240 Complete DM6:chr02    8847920  8895418      - 6694.3   3910
p <- busco_bar(busc, max_copy = NULL)
#p <- busco_bar(busc, max_copy = 3)
p <- p + ggtitle("BUSCO: DM6")
p

Sort by chromosome into a list

my_bl <- make_known(busc)
lapply( my_bl[1:3], function(x){x[1:2, ]} )
## $`DM6:chr01`
##     Busco_id   Status  Sequence Gene_Start Gene_End Strand  Score Length
## 4  86at71240 Complete DM6:chr01   85678541 85699353      + 5229.1   3121
## 6 224at71240 Complete DM6:chr01   74708259 74728652      - 2768.8   1841
## 
## $`DM6:chr02`
##    Busco_id   Status  Sequence Gene_Start Gene_End Strand  Score Length
## 2 10at71240 Complete DM6:chr02   42824870 42847588      - 6723.1   3829
## 3 17at71240 Complete DM6:chr02    8847920  8895418      - 6694.3   3910
## 
## $`DM6:chr03`
##      Busco_id   Status  Sequence Gene_Start Gene_End Strand  Score Length
## 7  265at71240 Complete DM6:chr03   60542709 60565715      + 2667.9   1616
## 11 383at71240 Complete DM6:chr03   12561124 12575718      - 2711.2   1679

Load Unknown

#busc <- read_busco("ATL_busco/full_table.tsv")
busc <- read_busco("atlantic_full_table.tsv")
busc$Sequence <- paste("ATL:", busc$Sequence, sep = "")
busc[1:3, ]
##   Busco_id     Status    Sequence Gene_Start Gene_End Strand  Score Length
## 1 2at71240 Duplicated ATL:chr04_0   24891993 24938026      - 5601.4   4110
## 2 2at71240 Duplicated ATL:chr04_2   15369596 15415265      - 5604.0   4121
## 3 2at71240 Duplicated ATL:chr04_3   19561433 19608034      - 5600.6   4114
p <- busco_bar(busc)
#p <- busco_bar(busc, max_copy = 6)
p <- p + ggtitle("BUSCO: Atlantic")
p

Add unknown to seed

lapply( my_bl[1:3], function(x){x[1:2, ]} )
## $`DM6:chr01`
##     Busco_id   Status  Sequence Gene_Start Gene_End Strand  Score Length
## 4  86at71240 Complete DM6:chr01   85678541 85699353      + 5229.1   3121
## 6 224at71240 Complete DM6:chr01   74708259 74728652      - 2768.8   1841
## 
## $`DM6:chr02`
##    Busco_id   Status  Sequence Gene_Start Gene_End Strand  Score Length
## 2 10at71240 Complete DM6:chr02   42824870 42847588      - 6723.1   3829
## 3 17at71240 Complete DM6:chr02    8847920  8895418      - 6694.3   3910
## 
## $`DM6:chr03`
##      Busco_id   Status  Sequence Gene_Start Gene_End Strand  Score Length
## 7  265at71240 Complete DM6:chr03   60542709 60565715      + 2667.9   1616
## 11 383at71240 Complete DM6:chr03   12561124 12575718      - 2711.2   1679
my_bl <- add_unknown(my_bl, busc)

lapply( my_bl[1:3], function(x){x[1:2, ]} )
## $`DM6:chr01`
##     Busco_id   Status  Sequence Gene_Start Gene_End Strand  Score Length
## 4  86at71240 Complete DM6:chr01   85678541 85699353      + 5229.1   3121
## 6 224at71240 Complete DM6:chr01   74708259 74728652      - 2768.8   1841
##   ATL:chr01_4 ATL:chr01_1 ATL:chr01_0 ATL:chr01_2 ATL:chr01_3 ATL:Unplaced
## 4    65848858          NA          NA          NA          NA           NA
## 6          NA    50440194          NA          NA          NA           NA
##   ATL:chr03_1
## 4          NA
## 6          NA
## 
## $`DM6:chr02`
##    Busco_id   Status  Sequence Gene_Start Gene_End Strand  Score Length
## 2 10at71240 Complete DM6:chr02   42824870 42847588      - 6723.1   3829
## 3 17at71240 Complete DM6:chr02    8847920  8895418      - 6694.3   3910
##   ATL:chr02_1 ATL:chr02_2 ATL:chr02_0 ATL:chr02_3
## 2    36328988          NA          NA          NA
## 3     3993300          NA          NA          NA
## 
## $`DM6:chr03`
##      Busco_id   Status  Sequence Gene_Start Gene_End Strand  Score Length
## 7  265at71240 Complete DM6:chr03   60542709 60565715      + 2667.9   1616
## 11 383at71240 Complete DM6:chr03   12561124 12575718      - 2711.2   1679
##    ATL:chr03_1 ATL:chr03_3 ATL:chr03_2 ATL:chr07_1 ATL:chr03_0 ATL:chr03_4
## 7     48224495          NA          NA          NA          NA          NA
## 11          NA    14358968          NA          NA          NA          NA

Nudge, flip, min threshold

# gg_line_map( my_bl[[1]][, c(4, 9:ncol(my_bl[[1]]))], rect = TRUE, lalpha = 0.08 )
my_bl <- position_BUSCOS(my_bl, min_BUSCO = my_min_BUSCO, sort_by = "max_busco")

Name the list

my_bl_atl <- my_bl

Castle-Russet

#busc <- read_busco("DM_6_busco/full_table.tsv")
busc <- read_busco("DM6_full_table.tsv")
busc$Sequence <- paste("DM6:", busc$Sequence, sep = "")
my_bl <- make_known(busc)
#busc <- read_busco("ATL_busco/full_table.tsv")
#busc <- read_busco("castle_russet/full_table.tsv")
busc <- read_busco("castle-russet_full_table.tsv")
busc$Sequence <- paste("CR:", busc$Sequence, sep = "")
#busc[1:3, ]
p <- busco_bar(busc)
#p <- busco_bar(busc, max_copy = 6)
#p <- p + ggtitle("BUSCO: Atlantic")
p <- p + ggtitle("BUSCO: Castle-Russet")
p

#lapply( my_bl[1:3], function(x){x[1:2, ]} )

my_bl <- add_unknown(my_bl, busc)

#lapply( my_bl[1:3], function(x){x[1:2, ]} )

Nudge, flip, min threshold

#my_bl <- position_BUSCOS(my_bl, sort_by = "max_pos")
#
my_bl <- position_BUSCOS(my_bl, min_BUSCO = my_min_BUSCO, sort_by = "max_busco")

Plot

#for(i in 1:length(busco_l)){
for(i in 1:length(my_bl)){
  tmp <- my_bl[[i]]
#  colnames(tmp)[9:ncol(tmp)] <- paste("CR:", colnames(tmp)[9:ncol(tmp)], sep = "")
  #tmp[1:3, ]
  rownames(tmp) <- tmp$Busco_id
  tmp <- tmp[ , c(4, 9:ncol(tmp))]
  #colnames(tmp)[ colnames(tmp) == "Gene_Start" ] <- paste("DM6:", names(my_bl)[i], sep = "")
  colnames(tmp)[ colnames(tmp) == "Gene_Start" ] <- names(my_bl)[i]
  #tmp[1:3, ]
  
  tmp2 <- my_bl_atl[[i]]
#  colnames(tmp2)[9:ncol(tmp2)] <- paste("ATL:", colnames(tmp2)[9:ncol(tmp2)], sep = "")
  tmp <- cbind( tmp2[ , ncol(tmp2):9 ], tmp )
  
  #p <- gg_line_map(tmp)
  p <- gg_line_map(tmp, rect = TRUE, palpha = 1, size = 2.2, 
                   lalpha = 0.1, linewidth = 1.4, line_na_rm = TRUE)
  p <- p + ggtitle( paste("Chromosome:", names(my_bl)[i]) )
  print(p)
}
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

t99 <- Sys.time()
t99 - t1
## Time difference of 10.23115 secs